from dash import Dash, dcc, html, Input, Output
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# Load the data
file_path = "https://raw.githubusercontent.com/plotly/Figure-Friday/main/2025/week-4/Post45_NEAData_Final.csv"
data = pd.read_csv(file_path, on_bad_lines='skip')
print(data.columns)
# Initialize the Dash app
app = Dash(__name__)
# Preprocess the data
# Melt certifications data for better grouping
certification_columns = {
'ba': "Bachelor's Degree",
'ba2': "Additional Bachelor's Degree",
'ma': "Master's Degree",
'ma2': "Additional Master's Degree",
'phd': "PhD",
'mfa': "MFA",
'mfa2': "Additional MFA"
}
certifications = data.melt(
id_vars=['gender'],
value_vars=list(certification_columns.keys()),
var_name='certification_type',
value_name='institution'
).dropna(subset=['institution'])
# Map the abbreviated certification names to full names
certifications['certification_type'] = certifications['certification_type'].map(certification_columns)
# Prepare data for the Choropleth map
# Combine year columns F and G into a single year column for counting
choropleth_data = data.copy()
choropleth_data['grant_year'] = choropleth_data['other_nea_grant'].fillna(choropleth_data['nea_grant_year'])
# Clean grant_year column by handling cases like "1979, 1989" or "1987;1979"
def clean_grant_year(value):
if isinstance(value, str):
for delimiter in [',', ';']:
if delimiter in value:
return int(value.split(delimiter)[0].strip()) # Take the first year
try:
return int(value.strip()) # Handle single-year strings
except ValueError:
return None
try:
return int(value) # Handle numeric values
except ValueError:
return None
choropleth_data['grant_year'] = choropleth_data['grant_year'].apply(clean_grant_year)
choropleth_data = choropleth_data.dropna(subset=['grant_year'])
choropleth_data = choropleth_data.groupby(['us_state', 'grant_year']).size().reset_index(name='count')
# Prepare data for gender trends over years
gender_trends = data.copy()
gender_trends['grant_year'] = gender_trends['other_nea_grant'].fillna(gender_trends['nea_grant_year'])
gender_trends['grant_year'] = gender_trends['grant_year'].apply(clean_grant_year)
gender_trends = gender_trends.dropna(subset=['grant_year', 'gender'])
gender_trends = gender_trends.groupby(['grant_year', 'gender']).size().reset_index(name='count')
# Prepare data for Pareto chart
gender_by_university = certifications.groupby(['institution', 'gender']).size().reset_index(name='count')
university_totals = gender_by_university.groupby('institution')['count'].sum().reset_index()
university_totals = university_totals.sort_values(by='count', ascending=False)
university_totals['cumulative_percentage'] = university_totals['count'].cumsum() / university_totals['count'].sum() * 100
# Layout of the Dash app
app.layout = html.Div([
html.H1('NEA Data Visualizations', style={'textAlign': 'center', 'color': 'white'}),
dcc.Tabs([
dcc.Tab(label='Certifications', children=[
html.Div([
html.H2('Certifications by Gender', style={'color': 'white'}),
dcc.Graph(id='certifications-by-gender'),
], style={'margin': '20px'}),
html.Div([
html.H2('Gender by University - Pareto Chart', style={'color': 'white'}),
dcc.Graph(id='pareto-gender-by-university'),
], style={'margin': '20px'})
]),
dcc.Tab(label='Grants', children=[
html.Div([
html.H2('NEA Grants Heatmap by State', style={'color': 'white'}),
dcc.Graph(id='heatmap-grants-by-state'),
], style={'margin': '20px'}),
html.Div([
html.H2('NEA Grants Choropleth Map', style={'color': 'white'}),
dcc.Graph(id='choropleth-map'),
], style={'margin': '20px'})
]),
dcc.Tab(label='Trends', children=[
html.Div([
html.H2('Male vs Female Grants Over the Years', style={'color': 'white'}),
dcc.Graph(id='gender-trends'),
], style={'margin': '20px'})
])
])
], style={'backgroundColor': '#1e1e1e', 'padding': '20px'})
# Callbacks to generate the graphs
@app.callback(
Output('certifications-by-gender', 'figure'),
Input('certifications-by-gender', 'id')
)
def update_certifications_by_gender(_):
fig = px.histogram(
certifications,
x='certification_type',
color='gender',
title='Count of Certifications by Gender',
barmode='group'
)
fig.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
return fig
@app.callback(
Output('pareto-gender-by-university', 'figure'),
Input('pareto-gender-by-university', 'id')
)
def update_pareto_gender_by_university(_):
fig = go.Figure()
# Add bar chart
fig.add_bar(
x=university_totals['institution'],
y=university_totals['count'],
name='Count'
)
# Add cumulative percentage line
fig.add_scatter(
x=university_totals['institution'],
y=university_totals['cumulative_percentage'],
mode='lines+markers',
name='Cumulative Percentage',
yaxis='y2'
)
# Update layout
fig.update_layout(
title='Gender by University - Pareto Chart',
yaxis=dict(title='Count'),
yaxis2=dict(
title='Cumulative Percentage',
overlaying='y',
side='right'
),
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font=dict(color='white')
)
return fig
@app.callback(
Output('heatmap-grants-by-state', 'figure'),
Input('heatmap-grants-by-state', 'id')
)
def update_heatmap_grants_by_state(_):
state_grants = data.groupby(['us_state', 'nea_grant_year']).size().reset_index(name='count')
fig = px.density_heatmap(
state_grants,
x='us_state',
y='nea_grant_year',
z='count',
title='Grants Heatmap by State',
color_continuous_scale='Viridis'
)
fig.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
return fig
@app.callback(
Output('choropleth-map', 'figure'),
Input('choropleth-map', 'id')
)
def update_choropleth_map(_):
fig = px.choropleth(
choropleth_data,
locations='us_state',
locationmode='USA-states',
color='count',
scope='usa',
animation_frame='grant_year',
title='NEA Grants by State Over Time',
labels={'count': 'Number of Grants'}
)
fig.update_layout(
geo=dict(bgcolor='rgba(0,0,0,0)'),
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
return fig
@app.callback(
Output('gender-trends', 'figure'),
Input('gender-trends', 'id')
)
def update_gender_trends(_):
fig = px.line(
gender_trends,
x='grant_year',
y='count',
color='gender',
title='Male vs Female Grants Over the Years'
)
fig.update_layout(
plot_bgcolor='#1e1e1e',
paper_bgcolor='#1e1e1e',
font_color='white'
)
return fig
# Run the app
if __name__ == '__main__':
app.run_server(debug=True)